1. 模型选择
划分训练集与测试集
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
2. 模型评估
2.1. 分类问题
from sklearn.metrics import accuracy_score score=accuracy_score(y_test, y_pred) score >>> 0.8349474338391424 from sklearn.metrics import classification_report report_str=classification_report(y_test, y_pred) print (report_str) >>> precision recall f1-score support 0.0 0.90 0.79 0.84 21521 1.0 0.77 0.89 0.83 17097 accuracy 0.83 38618 macro avg 0.84 0.84 0.83 38618 weighted avg 0.84 0.83 0.84 38618 # 混淆矩阵 from sklearn.metrics import confusion_matrix array=confusion_matrix(y_test, y_pred) array >>> array([[17097, 4424], [ 1950, 15147]]) # F1 值 f1_score = sklearn.metrics.f1_score(y_test, y_pred) f1_score >>> from sklearn.metrics import confusion_matrix array=confusion_matrix(y_test, y_pred)
2.2. 回归指标
# 平均绝对误差 from sklearn.metrics import mean_absolute_error mae=mean_absolute_error(y_true, y_pred) mae >>> 1.12 # 均方误差 from sklearn.metrics import mean_squared_error mse=mean_squared_error(y_test, y_pred) mse >>> 2.22 # R2 评分 决定系数 from sklearn.metrics import r2_score r2_score=r2_score(y_true, y_pred) r2_score >>> 0.11
聚类指标
群集指标
# 修正兰德系数 from sklearn.metrics import adjusted_rand_score adjusted_rand_score(y_true, y_pred) # 同质性 from sklearn.metrics import homogeneity_score homogeneity_score(y_true, y_pred) # V-measure from sklearn.metrics import v_measure_score metrics.v_measure_score(y_true, y_pred)